Fit density models to cod of different sizes and calculate spatial overlap with prey

Author

Max Lindmark

Published

August 26, 2023

Load packages & source functions

Code
# Using dev branch of sdmTMB
# remotes::install_github("pbs-assess/sdmTMB", dependencies = TRUE)

# with_libpaths(new = "/Users/maxlindmark/Dropbox/Max work/R/sdmTMB-zeta-intercept/",
#               install_github("pbs-assess/sdmTMB", ref = "zeta-intercept"))

library(sdmTMB, lib.loc = "/Users/maxlindmark/Dropbox/Max work/R/sdmTMB-zeta-intercept")

pkgs <- c("tidyverse", "RCurl", "viridis", "devtools", "terra", "readxl", "mapplots", "tidylog") 

# minpack.lm needed if using nlsLM()
if(length(setdiff(pkgs,rownames(installed.packages()))) > 0){

    install.packages(setdiff(pkgs, rownames(installed.packages())), dependencies = T)
  
  }

invisible(lapply(pkgs, library, character.only = T))

# Packages not on CRAN or dev version
# remotes::install_github("pbs-assess/sdmTMB", dependencies = TRUE)
library(sdmTMB)

# devtools::install_github("seananderson/ggsidekick") # not on CRAN 
library(ggsidekick)
theme_set(theme_sleek(base_size = 20))

# Source code for map plots
# You need: # devtools::install_github("seananderson/ggsidekick") # not on CRAN; library(ggsidekick)
# Source code for map plots
# You need: # devtools::install_github("seananderson/ggsidekick") # not on CRAN; library(ggsidekick)
devtools::source_url("https://raw.githubusercontent.com/maxlindmark/pred-prey-overlap/main/R/functions/map-plot.R")

options(ggplot2.continuous.colour = "viridis")

# Source overlap functions
devtools::source_url("https://raw.githubusercontent.com/maxlindmark/pred-prey-overlap/main/R/functions/overlap-indices.R")

# Set path
home <- here::here()

Read spatiotemporal predictions from 01-fit-sdm.qmd

# Use only one of them because they are so similar
cod_pred <- read_csv(paste0(home, "/output/pred_cod.csv"))
Rows: 943428 Columns: 38
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): substrate, month_year, ices_rect
dbl (35): X, Y, year, lon, lat, depth, quarter, month, oxy, temp, sal, sub_d...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Add in prey to density data

Saduria

# TODO: Update years!?
saduria <- terra::rast(paste0(home, "/data/saduria-tif/FWBiomassm_raster_19812019presHighweightcor_no0_newZi.tif"))

WGS84 <- "+proj=longlat +datum=WGS84"

saduria_latlon <- terra::project(saduria, WGS84)

density_saduria <- terra::extract(saduria_latlon, cod_pred %>% dplyr::select(lon, lat), method = "bilinear")

cod_pred$density_saduria <- density_saduria$FWBiomassm_raster_19812019presHighweightcor_no0_newZi

plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year == 1999 & quarter == 4), aes(X*1000, Y*1000, fill = density_saduria)) +
  scale_fill_viridis(trans = "sqrt", name = "Saduria biomass density")
filter: removed 927,162 rows (98%), 16,266 rows remaining
Warning: Removed 761 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/supp/spatial_saduria_biomass.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 761 rows containing missing values (`geom_raster()`).

Pelagics

# TODO: Update years!
# Read data on rectangle level
spr <- read_xlsx(paste0(home, "/data/BIAS/N and B per Rect. 1991-2020.xlsx"),
                 sheet = 4) |>
  mutate(sub_div = ifelse(Sub_Div == "28_2", "28", Sub_Div)) |> 
  filter(sub_div %in% c("24", "25", "26", "27", "28")) |> 
  rename("ices_rect" = "RECT",
         "Year" = "ANNUS") |>
  mutate_at(vars(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`), ~replace_na(., 0)) |> # I need to replace NA with 0, else I can't sum! According to Olavi who sent the data, NA means 0 and nothing else. Rectangle*year combinations that do not have information about biomass are simply not included in this data
  mutate(ices_rect = as.factor(ices_rect),
         Species = "Sprat",
         biomass_spr = `1`+`2`+`3`+`4`+`5`+`6`+`7`+`8`, 
         IDr = paste(ices_rect, Year, sep = "."),
         lon = ices.rect(ices_rect)$lon,
         lat = ices.rect(ices_rect)$lat) |> # Make new ID
  filter(Year > 1992 & Year < 2020)
mutate: new variable 'sub_div' (character) with 12 unique values and 0% NA
filter: removed 1,330 rows (46%), 1,585 rows remaining
rename: renamed 2 variables (Year, ices_rect)
mutate_at: changed 60 values (4%) of '1' (60 fewer NA)
           changed 42 values (3%) of '2' (42 fewer NA)
           changed 32 values (2%) of '3' (32 fewer NA)
           changed 48 values (3%) of '4' (48 fewer NA)
           changed 72 values (5%) of '5' (72 fewer NA)
           changed 160 values (10%) of '6' (160 fewer NA)
           changed 437 values (28%) of '7' (437 fewer NA)
           changed 543 values (34%) of '8' (543 fewer NA)
mutate: converted 'ices_rect' from character to factor (0 new NA)
        new variable 'Species' (character) with one unique value and 0% NA
        new variable 'biomass_spr' (double) with 1,571 unique values and 0% NA
        new variable 'IDr' (character) with 1,563 unique values and 0% NA
        new variable 'lon' (double) with 10 unique values and 0% NA
        new variable 'lat' (double) with 10 unique values and 0% NA
filter: removed 147 rows (9%), 1,438 rows remaining
spr <- spr |> add_utm_columns(ll_names = c("lon", "lat"),
                              utm_crs = 32633)

her <- read_xlsx(paste0(home, "/data/BIAS/N and B per Rect. 1991-2020.xlsx"),
                 sheet = 3) |>
  mutate(sub_div = ifelse(Sub_Div == "28_2", "28", Sub_Div)) |> 
  filter(sub_div %in% c("24", "25", "26", "27", "28")) |> 
  rename("ices_rect" = "RECT",
         "Year" = "ANNUS") |>
  mutate_at(vars(`1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`), ~replace_na(., 0)) |>
  mutate(ices_rect = as.factor(ices_rect),
         Species = "Herring",
         biomass_her = `1`+`2`+`3`+`4`+`5`+`6`+`7`+`8`, 
         IDr = paste(ices_rect, Year, sep = "."),
         lon = ices.rect(ices_rect)$lon,
         lat = ices.rect(ices_rect)$lat) |> # Make new ID
  filter(Year > 1992 & Year < 2020)
mutate: new variable 'sub_div' (character) with 12 unique values and 0% NA
filter: removed 1,330 rows (46%), 1,585 rows remaining
rename: renamed 2 variables (Year, ices_rect)
mutate_at: changed 66 values (4%) of '1' (66 fewer NA)
           changed 31 values (2%) of '2' (31 fewer NA)
           changed 25 values (2%) of '3' (25 fewer NA)
           changed 25 values (2%) of '4' (25 fewer NA)
           changed 27 values (2%) of '5' (27 fewer NA)
           changed 55 values (3%) of '6' (55 fewer NA)
           changed 172 values (11%) of '7' (172 fewer NA)
           changed 317 values (20%) of '8' (317 fewer NA)
mutate: converted 'ices_rect' from character to factor (0 new NA)
        new variable 'Species' (character) with one unique value and 0% NA
        new variable 'biomass_her' (double) with 1,574 unique values and 0% NA
        new variable 'IDr' (character) with 1,563 unique values and 0% NA
        new variable 'lon' (double) with 10 unique values and 0% NA
        new variable 'lat' (double) with 10 unique values and 0% NA
filter: removed 147 rows (9%), 1,438 rows remaining
her <- her |> add_utm_columns(ll_names = c("lon", "lat"),
                              utm_crs = 32633)

# Plot distribution over time in the whole area
plot_map_fc + 
  geom_point(data = spr, aes(X*1000, Y*1000, color = biomass_spr), size = 1.5) +
  facet_wrap(~Year) + 
  scale_color_viridis(trans = "sqrt", name = "Sprat biomass (tonnes)")
Warning: Removed 157 rows containing missing values (`geom_point()`).

plot_map_fc + 
  geom_point(data = her, aes(X*1000, Y*1000, color = biomass_her), size = 1.5) +
  facet_wrap(~Year) + 
  scale_color_viridis(trans = "sqrt", name = "Herring biomass (tonnes)")
Warning: Removed 157 rows containing missing values (`geom_point()`).

# How many unique rows per IDr?
her |>
  group_by(IDr) |> 
  mutate(n = n()) |> 
  ggplot(aes(factor(n))) + geom_bar()
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with 2 unique values and 0% NA

spr |>
  group_by(IDr) |> 
  mutate(n = n()) |> 
  ggplot(aes(factor(n))) + geom_bar()
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with 2 unique values and 0% NA

# Ok, some ID's with two rows...
spr |>
  group_by(IDr) |> 
  mutate(n = n()) |> 
  filter(n == 2) |> 
  ungroup() |> 
  as.data.frame() |> 
  head(20)
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with 2 unique values and 0% NA
filter (grouped): removed 1,398 rows (97%), 40 rows remaining
ungroup: no grouping variables
   Year Sub_Div ices_rect         0         1         2         3         4
1  1994      24      39G4  320.6336  166.2724  712.1302 4903.8297 4410.8288
2  1994      25      39G4    5.2000    0.0000   57.0900  134.0325  119.1141
3  2001      24      39G4 2225.9488 1173.8470 3273.5995 1617.4003 1969.9090
4  2001      25      39G4  278.1660  130.9860  937.9096    0.0000 1292.6890
5  2002      24      39G4 1384.7183 6097.1191 1101.2682 1235.1920  526.4083
6  2002      25      39G4    7.6246   56.6614   15.2600   63.5376    6.2700
7  2003      24      39G4 1118.4120 1789.4837 2596.2783 1444.8346  784.1097
8  2003      25      39G4  330.2208 2933.2882 7655.7000 1919.7420 1614.0300
9  2004      24      39G4    2.9223 5473.5987 1253.9853  839.8116  502.0176
10 2004      25      39G4        NA  461.8666  329.2650   41.5800  107.8000
11 2005      24      39G4    0.1534   79.2224  742.3837  247.2505  135.1516
12 2005      25      39G4        NA   80.4000 3641.7421 1065.3212  395.0400
13 2006      24      39G4 2936.5816 2446.2891 1190.8490 2350.5299  661.5522
14 2006      25      39G4    7.3150   68.3306   87.3600  687.0052  125.1660
15 2007      24      39G4    6.8240 3686.9296 1219.2915 1803.8780  414.4900
16 2007      25      39G4        NA 2014.6113  917.6975    0.0000  288.2040
17 2008      24      39G4 1121.0336 1024.4382 5367.2063 3013.6222 1187.4544
18 2008      25      39G4   10.2600   95.6142 1152.3245   44.4235   16.3200
19 2009      24      39G4   16.3404 1707.2880 1987.7076 1672.8639  152.6547
20 2009      25      39G4    2.7500  683.8874  321.7600 1246.5090  118.5850
          5        6        7        8 sub_div Species biomass_spr       IDr
1  917.8628 286.2860   0.0000   0.0000      24   Sprat  11397.2100 39G4.1994
2  157.8990  68.0775   0.0000  33.4000      25   Sprat    569.6131 39G4.1994
3  305.7184 465.1775  76.4460  87.7200      24   Sprat   8969.8178 39G4.2001
4  175.6150 394.4763  83.7000  96.0500      25   Sprat   3111.4259 39G4.2001
5  506.3207 297.7496   6.9768  35.1428      24   Sprat   9806.1775 39G4.2002
6   21.3360   2.1000  10.7100   7.8300      25   Sprat    183.7050 39G4.2002
7   78.4584  79.1370 121.8612  26.0760      24   Sprat   6920.2389 39G4.2003
8  952.5050  75.9600 303.1200   0.0000      25   Sprat  15454.3452 39G4.2003
9  110.4840  98.0056  31.2294  31.2294      24   Sprat   8340.3617 39G4.2004
10   0.0000   0.0000  21.8400  12.3200      25   Sprat    974.6716 39G4.2004
11  19.5444   0.0000   0.0000  25.8071      24   Sprat   1249.3597 39G4.2005
12   0.0000  32.3000   0.0000   0.0000      25   Sprat   5214.8033 39G4.2005
13 182.1606  61.5351   0.0000   0.0000      24   Sprat   6892.9159 39G4.2006
14   9.6600   0.0000   0.0000 134.1756      25   Sprat   1111.6974 39G4.2006
15  76.1760  12.5400   0.0000   0.0000      24   Sprat   7213.3050 39G4.2007
16  37.6000  63.5400   0.0000  98.0800      25   Sprat   3419.7328 39G4.2007
17 422.8412  61.9743  61.9743   0.0000      24   Sprat  11139.5108 39G4.2008
18 106.0371  73.7800   0.0000   0.0000      25   Sprat   1488.4993 39G4.2008
19 222.6978  25.3616  11.2536  25.3616      24   Sprat   5805.1888 39G4.2009
20  11.5500  77.8500   0.0000  91.4400      25   Sprat   2551.5814 39G4.2009
    lon   lat        X        Y n
1  14.5 55.25 468.2151 6122.726 2
2  14.5 55.25 468.2151 6122.726 2
3  14.5 55.25 468.2151 6122.726 2
4  14.5 55.25 468.2151 6122.726 2
5  14.5 55.25 468.2151 6122.726 2
6  14.5 55.25 468.2151 6122.726 2
7  14.5 55.25 468.2151 6122.726 2
8  14.5 55.25 468.2151 6122.726 2
9  14.5 55.25 468.2151 6122.726 2
10 14.5 55.25 468.2151 6122.726 2
11 14.5 55.25 468.2151 6122.726 2
12 14.5 55.25 468.2151 6122.726 2
13 14.5 55.25 468.2151 6122.726 2
14 14.5 55.25 468.2151 6122.726 2
15 14.5 55.25 468.2151 6122.726 2
16 14.5 55.25 468.2151 6122.726 2
17 14.5 55.25 468.2151 6122.726 2
18 14.5 55.25 468.2151 6122.726 2
19 14.5 55.25 468.2151 6122.726 2
20 14.5 55.25 468.2151 6122.726 2
# It's because rectangles somehow being in different sub divisions.
# I need to group by IDr and summarize
spr_sum <- spr |>
  group_by(IDr) |> 
  summarise(biomass_spr = sum(biomass_spr)) |> # Sum abundance within IDr
  distinct(IDr, .keep_all = TRUE) |> # Remove duplicate IDr
  mutate(ID_temp = IDr) |> # Create temporary IDr that we can use to split in order
  # to get Year and StatRect back into the summarized data
  separate(ID_temp, c("StatRec", "Year"), sep = 4)
group_by: one grouping variable (IDr)
summarise: now 1,418 rows and 2 columns, ungrouped
distinct: no rows removed
mutate: new variable 'ID_temp' (character) with 1,418 unique values and 0% NA
nrow(spr_sum) 
[1] 1418
nrow(spr)
[1] 1438
nrow(spr |> group_by(IDr) |> mutate(n = n()) |> filter(n == 2))
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with 2 unique values and 0% NA
filter (grouped): removed 1,398 rows (97%), 40 rows remaining
[1] 40
# Check with a specific rectangle
filter(spr_sum, IDr == "39G4.1991")
filter: removed all rows (100%)
# A tibble: 0 × 4
# ℹ 4 variables: IDr <chr>, biomass_spr <dbl>, StatRec <chr>, Year <chr>
filter(spr, IDr == "39G4.1991")
filter: removed all rows (100%)
# A tibble: 0 × 20
# ℹ 20 variables: Year <dbl>, Sub_Div <chr>, ices_rect <fct>, 0 <dbl>, 1 <dbl>,
#   2 <dbl>, 3 <dbl>, 4 <dbl>, 5 <dbl>, 6 <dbl>, 7 <dbl>, 8 <dbl>,
#   sub_div <chr>, Species <chr>, biomass_spr <dbl>, IDr <chr>, lon <dbl>,
#   lat <dbl>, X <dbl>, Y <dbl>
# This should equal 1 (new # rows =  old - duplicated IDr)
nrow(spr_sum) / (nrow(spr) - 0.5*nrow(spr |> group_by(IDr) |> mutate(n = n()) |> filter(n == 2)))
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with 2 unique values and 0% NA
filter (grouped): removed 1,398 rows (97%), 40 rows remaining
[1] 1
# How many rows per rectangle?
spr_sum |>
  group_by(IDr) |> 
  mutate(n = n()) |> 
  ungroup() |> 
  distinct(n)
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with one unique value and 0% NA
ungroup: no grouping variables
distinct: removed 1,417 rows (>99%), one row remaining
# A tibble: 1 × 1
      n
  <int>
1     1
# Now do the same for herring
her_sum <- her |>
  group_by(IDr) |> 
  summarise(biomass_her = sum(biomass_her)) |> # Sum abundance within IDr
  distinct(IDr, .keep_all = TRUE) |> # Remove duplicate IDr
  mutate(ID_temp = IDr) |> # Create temporary IDr that we can use to split in order
  # to get Year and StatRect back into the summarized data
  separate(ID_temp, c("StatRec", "Year"), sep = 4)
group_by: one grouping variable (IDr)
summarise: now 1,418 rows and 2 columns, ungrouped
distinct: no rows removed
mutate: new variable 'ID_temp' (character) with 1,418 unique values and 0% NA
nrow(her_sum) 
[1] 1418
nrow(her)
[1] 1438
nrow(her |> group_by(IDr) |> mutate(n = n()) |> filter(n == 2))
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with 2 unique values and 0% NA
filter (grouped): removed 1,398 rows (97%), 40 rows remaining
[1] 40
filter(her_sum, IDr == "39G4.1991")
filter: removed all rows (100%)
# A tibble: 0 × 4
# ℹ 4 variables: IDr <chr>, biomass_her <dbl>, StatRec <chr>, Year <chr>
filter(her, IDr == "39G4.1991")
filter: removed all rows (100%)
# A tibble: 0 × 20
# ℹ 20 variables: Year <dbl>, Sub_Div <chr>, ices_rect <fct>, 0 <dbl>, 1 <dbl>,
#   2 <dbl>, 3 <dbl>, 4 <dbl>, 5 <dbl>, 6 <dbl>, 7 <dbl>, 8 <dbl>,
#   sub_div <chr>, Species <chr>, biomass_her <dbl>, IDr <chr>, lon <dbl>,
#   lat <dbl>, X <dbl>, Y <dbl>
# This should equal 1 (new # rows =  old - duplicated IDr)
nrow(her_sum) / (nrow(her) - 0.5*nrow(her |> group_by(IDr) |> mutate(n = n()) |> filter(n == 2)))
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with 2 unique values and 0% NA
filter (grouped): removed 1,398 rows (97%), 40 rows remaining
[1] 1
# How many rows per rectangle?
her_sum |>
  group_by(IDr) |> 
  mutate(n = n()) |> 
  ungroup() |> 
  distinct(n)
group_by: one grouping variable (IDr)
mutate (grouped): new variable 'n' (integer) with one unique value and 0% NA
ungroup: no grouping variables
distinct: removed 1,417 rows (>99%), one row remaining
# A tibble: 1 × 1
      n
  <int>
1     1
# Join pelagic data
pel <- left_join(her_sum, spr_sum)
Joining with `by = join_by(IDr, StatRec, Year)`
left_join: added one column (biomass_spr)
           > rows only in x       0
           > rows only in y  (    0)
           > matched rows     1,418
           >                 =======
           > rows total       1,418
cod_pred <- cod_pred |>
  mutate(IDr = paste(ices_rect, year, sep = ".")) |> 
  left_join(pel)
mutate: new variable 'IDr' (character) with 1,769 unique values and 0% NA
Joining with `by = join_by(IDr)`
left_join: added 4 columns (biomass_her, StatRec, Year, biomass_spr)
> rows only in x 131,190
> rows only in y ( 54)
> matched rows 812,238
> =========
> rows total 943,428
names(cod_pred)
 [1] "X"                  "Y"                  "year"              
 [4] "lon"                "lat"                "depth"             
 [7] "substrate"          "quarter"            "month"             
[10] "month_year"         "oxy"                "temp"              
[13] "sal"                "sub_div"            "ices_rect"         
[16] "depth_sc"           "depth_sq"           "temp_sc"           
[19] "temp_sq"            "oxy_sc"             "sal_sc"            
[22] "year_f"             "quarter_f"          "est1"              
[25] "est2"               "est_non_rf1"        "est_non_rf2"       
[28] "est_rf1"            "est_rf2"            "omega_s1"          
[31] "omega_s2"           "zeta_s_quarter_f11" "zeta_s_quarter_f12"
[34] "zeta_s_quarter_f41" "zeta_s_quarter_f42" "epsilon_st1"       
[37] "epsilon_st2"        "est"                "density_saduria"   
[40] "IDr"                "biomass_her"        "StatRec"           
[43] "Year"               "biomass_spr"       
# If NA on the rectangle level, take the average values across ices rectangles *within* the sub-division
# If no rectangles within the SD, take the annual mean
cod_pred <- cod_pred |> 
  group_by(year, sub_div) |> 
  mutate(biomass_her_year_sd_mean = mean(biomass_her, na.rm = TRUE),
         biomass_spr_year_sd_mean = mean(biomass_spr, na.rm = TRUE)) |> 
  ungroup() |>
  group_by(year) |> 
  mutate(biomass_her_year_mean = mean(biomass_her, na.rm = TRUE),
         biomass_spr_year_mean = mean(biomass_spr, na.rm = TRUE)) |> 
  ungroup() |> 
  mutate(biomass_her = ifelse(is.na(biomass_her), biomass_her_year_sd_mean, biomass_her),
         biomass_spr = ifelse(is.na(biomass_spr), biomass_spr_year_sd_mean, biomass_spr)) |> 
  mutate(biomass_her = ifelse(is.na(biomass_her), biomass_her_year_mean, biomass_her),
         biomass_spr = ifelse(is.na(biomass_spr), biomass_spr_year_mean, biomass_spr)) |> 
  dplyr::select(-biomass_her_year_mean, -biomass_her_year_mean, 
                -biomass_spr_year_sd_mean, -biomass_spr_year_mean)
group_by: 2 grouping variables (year, sub_div)
mutate (grouped): new variable 'biomass_her_year_sd_mean' (double) with 133 unique values and 9% NA
                  new variable 'biomass_spr_year_sd_mean' (double) with 133 unique values and 9% NA
ungroup: no grouping variables
group_by: one grouping variable (year)
mutate (grouped): new variable 'biomass_her_year_mean' (double) with 28 unique values and 7% NA
                  new variable 'biomass_spr_year_mean' (double) with 28 unique values and 7% NA
ungroup: no grouping variables
mutate: changed 50,514 values (5%) of 'biomass_her' (50514 fewer NA)
        changed 50,514 values (5%) of 'biomass_spr' (50514 fewer NA)
mutate: changed 15,612 values (2%) of 'biomass_her' (15612 fewer NA)
        changed 15,612 values (2%) of 'biomass_spr' (15612 fewer NA)
# Test plot
plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year < 2020), aes(X*1000, Y*1000, fill = biomass_spr)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Sprat biomass (tonnes)")
filter: removed 65,064 rows (7%), 878,364 rows remaining
Warning: Removed 41094 rows containing missing values (`geom_raster()`).

plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year < 2020), aes(X*1000, Y*1000, fill = biomass_her)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Sprat biomass (tonnes)")
filter: removed 65,064 rows (7%), 878,364 rows remaining
Warning: Removed 41094 rows containing missing values (`geom_raster()`).

# Now add in the sub division values (in case we want to look at that also)
biomass_spr_sd <- read_xlsx(paste0(home, "/data/BIAS/N and B per SD 1991-2020.xlsx"),
                            sheet = 4) |>
  mutate(sub_div = ifelse(Sub_Div == "28_2", "28", Sub_Div)) |>
  filter(sub_div %in% c("24", "25", "26", "27", "28")) |>
  rename("Year" = "ANNUS") |>
  mutate_at(vars(`AGE1`, `AGE2`, `AGE3`, `AGE4`, `AGE5`, `AGE6`, `AGE7`, `AGE8+`), ~replace_na(., 0)) |> # I need to replace NA with 0, else I can't sum! According to Olavi who sent the data, NA means 0 and nothing else. Rectangle*year combinations that do not have information about biomass are simply not included in this data
  mutate(sub_div = as.factor(sub_div),
         Species = "Sprat",
         biomass_spr_sd = `AGE1`+`AGE2`+`AGE3`+`AGE4`+`AGE5`+`AGE6`+`AGE7`+`AGE8+`, # omitting `0+` here
         ID_sd_year = paste(sub_div, Year, sep = ".")) |> # Make new ID
  dplyr::select(biomass_spr_sd, ID_sd_year)
mutate: new variable 'sub_div' (character) with 12 unique values and 0% NA
filter: removed 152 rows (51%), 147 rows remaining
rename: renamed one variable (Year)
mutate_at: changed one value (1%) of 'AGE6' (1 fewer NA)
           changed 10 values (7%) of 'AGE7' (10 fewer NA)
           changed 13 values (9%) of 'AGE8+' (13 fewer NA)
mutate: converted 'sub_div' from character to factor (0 new NA)
        new variable 'Species' (character) with one unique value and 0% NA
        new variable 'biomass_spr_sd' (double) with 147 unique values and 0% NA
        new variable 'ID_sd_year' (character) with 147 unique values and 0% NA
biomass_her_sd <- read_xlsx(paste0(home, "/data/BIAS/N and B per SD 1991-2020.xlsx"),
                            sheet = 3) |>
  mutate(sub_div = ifelse(Sub_Div == "28_2", "28", Sub_Div)) |>
  filter(sub_div %in% c("24", "25", "26", "27", "28")) |>
  rename("Year" = "ANNUS") |>
  mutate_at(vars(`AGE1`, `AGE2`, `AGE3`, `AGE4`, `AGE5`, `AGE6`, `AGE7`, `AGE8+`), ~replace_na(., 0)) |> 
  mutate(sub_div = as.factor(sub_div),
         Species = "Sprat",
         biomass_her_sd = `AGE1`+`AGE2`+`AGE3`+`AGE4`+`AGE5`+`AGE6`+`AGE7`+`AGE8+`,
         ID_sd_year = paste(sub_div, Year, sep = ".")) |>
  dplyr::select(biomass_her_sd, ID_sd_year)
mutate: new variable 'sub_div' (character) with 12 unique values and 0% NA
filter: removed 152 rows (51%), 147 rows remaining
rename: renamed one variable (Year)
mutate_at: changed 2 values (1%) of 'AGE7' (2 fewer NA)
           changed 7 values (5%) of 'AGE8+' (7 fewer NA)
mutate: converted 'sub_div' from character to factor (0 new NA)
        new variable 'Species' (character) with one unique value and 0% NA
        new variable 'biomass_her_sd' (double) with 147 unique values and 0% NA
        new variable 'ID_sd_year' (character) with 147 unique values and 0% NA
# Add in the same id to the pred_grid
cod_pred <- cod_pred |> mutate(ID_sd_year = paste(sub_div, year, sep = "."))
mutate: new variable 'ID_sd_year' (character) with 145 unique values and 0% NA
cod_pred <- left_join(cod_pred, biomass_spr_sd)
Joining with `by = join_by(ID_sd_year)`
left_join: added one column (biomass_spr_sd)
> rows only in x 48,144
> rows only in y ( 10)
> matched rows 895,284
> =========
> rows total 943,428
cod_pred <- left_join(cod_pred, biomass_her_sd)
Joining with `by = join_by(ID_sd_year)`
left_join: added one column (biomass_her_sd)
> rows only in x 48,144
> rows only in y ( 10)
> matched rows 895,284
> =========
> rows total 943,428
# Plot. Here I also have NAs. If NA, take the annual average
plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year < 2020), aes(X*1000, Y*1000, fill = biomass_spr_sd)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Sprat biomass (tonnes)")
filter: removed 65,064 rows (7%), 878,364 rows remaining
Warning: Removed 41094 rows containing missing values (`geom_raster()`).

plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year < 2020), aes(X*1000, Y*1000, fill = biomass_her_sd)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Herring biomass (tonnes)")
filter: removed 65,064 rows (7%), 878,364 rows remaining
Warning: Removed 41094 rows containing missing values (`geom_raster()`).

cod_pred <- cod_pred |> 
  group_by(year) |> 
  mutate(biomass_her_mean_sd = mean(biomass_her_sd, na.rm = TRUE),
         biomass_spr_mean_sd = mean(biomass_spr_sd, na.rm = TRUE)) |> 
  ungroup() |> 
  mutate(biomass_her_sd = ifelse(is.na(biomass_her_sd), biomass_her_mean_sd, biomass_her_sd),
         biomass_spr_sd = ifelse(is.na(biomass_spr_sd), biomass_spr_mean_sd, biomass_spr_sd))
group_by: one grouping variable (year)
mutate (grouped): new variable 'biomass_her_mean_sd' (double) with 29 unique values and 3% NA
                  new variable 'biomass_spr_mean_sd' (double) with 29 unique values and 3% NA
ungroup: no grouping variables
mutate: changed 15,612 values (2%) of 'biomass_spr_sd' (15612 fewer NA)
        changed 15,612 values (2%) of 'biomass_her_sd' (15612 fewer NA)
# Save plots with "imputed" biomasses
# subdivision
plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year < 2020), aes(X*1000, Y*1000, fill = biomass_spr_sd)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Sprat biomass (tonnes)")
filter: removed 65,064 rows (7%), 878,364 rows remaining
Warning: Removed 41094 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/supp/spatial_sprat_biomass_sd.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 41094 rows containing missing values (`geom_raster()`).
plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year < 2020), aes(X*1000, Y*1000, fill = biomass_her_sd)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Herring biomass (tonnes)")
filter: removed 65,064 rows (7%), 878,364 rows remaining
Warning: Removed 41094 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/supp/spatial_herring_biomass_sd.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 41094 rows containing missing values (`geom_raster()`).
# Ices rectangle
plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year < 2020), aes(X*1000, Y*1000, fill = biomass_spr)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Sprat biomass (tonnes)")
filter: removed 65,064 rows (7%), 878,364 rows remaining
Warning: Removed 41094 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/supp/spatial_sprat_biomass_rec.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 41094 rows containing missing values (`geom_raster()`).
plot_map_fc + 
  geom_raster(data = cod_pred |> filter(year < 2020), aes(X*1000, Y*1000, fill = biomass_her)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Herring biomass (tonnes)")
filter: removed 65,064 rows (7%), 878,364 rows remaining
Warning: Removed 41094 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/supp/spatial_herring_biomass_rec.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 41094 rows containing missing values (`geom_raster()`).
# Drop NA now (years after 2019 because I don't yet have pelagic data from that time)
cod_pred <- cod_pred |> filter(year < 2020)
filter: removed 65,064 rows (7%), 878,364 rows remaining
# Trim predicted biomass densities
cod_pred <- cod_pred |> filter(est < quantile(est, probs = 0.999))
filter: removed 879 rows (<1%), 877,485 rows remaining

Calculate spatial overlap indices

Remember, pelagic data is from Q4 while saduria data is not resolved on a finer scale than year

# Pelagics
# Calculate overlap indices in space
cod_pel <- cod_pred |> 
  filter(quarter == 4) |> 
  group_by(year, quarter) |> 
  mutate(# sprat
         cod_spr_ovr = asymmalpha_overlapspfn(pred = est, prey = biomass_spr),
         cod_spr_ovr_sd = asymmalpha_overlapspfn(pred = est, prey = biomass_spr_sd),
         
         # herring
         cod_her_ovr = asymmalpha_overlapspfn(pred = est, prey = biomass_her),
         cod_her_ovr_sd = asymmalpha_overlapspfn(pred = est, prey = biomass_her_sd)) |> 
  ungroup()
filter: removed 438,761 rows (50%), 438,724 rows remaining
group_by: 2 grouping variables (year, quarter)
mutate (grouped): new variable 'cod_spr_ovr' (double) with 434,783 unique values and 0% NA
                  new variable 'cod_spr_ovr_sd' (double) with 438,724 unique values and 0% NA
                  new variable 'cod_her_ovr' (double) with 437,262 unique values and 0% NA
                  new variable 'cod_her_ovr_sd' (double) with 438,724 unique values and 0% NA
ungroup: no grouping variables
# Calculate summarised overlaps
cod_pel_sum <- cod_pel |> 
  group_by(year) |> 
  summarise(cod_spr_ovr_tot = asymmalpha_overlapspfn_tot(pred = est, prey = biomass_spr),
            cod_spr_ovr_sd_tot = asymmalpha_overlapspfn_tot(pred = est, prey = biomass_spr_sd),
            
            cod_her_ovr_tot = asymmalpha_overlapspfn_tot(pred = est, prey = biomass_her),
            cod_her_ovr_sd_tot = asymmalpha_overlapspfn_tot(pred = est, prey = biomass_her_sd),
            
            # centre of gravity)
            cod_spr_cog_Y = weighted.mean(Y, cod_spr_ovr),
            cod_spr_cog_X = weighted.mean(X, cod_spr_ovr),
            cod_spr_cog_sd_Y = weighted.mean(Y, cod_spr_ovr_sd),
            cod_spr_cog_sd_X = weighted.mean(X, cod_spr_ovr_sd),
            
            cod_her_cog_Y = weighted.mean(Y, cod_her_ovr),
            cod_her_cog_X = weighted.mean(X, cod_her_ovr),
            cod_her_cog_sd_Y = weighted.mean(Y, cod_her_ovr_sd),
            cod_her_cog_sd_X = weighted.mean(X, cod_her_ovr_sd)) |> 
  ungroup()
group_by: one grouping variable (year)
summarise: now 27 rows and 13 columns, ungrouped
ungroup: no grouping variables
# Saduria
# Calculate overlap indices in space
cod_ben <- cod_pred |> 
  drop_na(density_saduria) |> 
  group_by(year, quarter) |> 
  mutate(cod_sad_ovr = asymmalpha_overlapspfn(pred = est, prey = density_saduria)) |> 
  ungroup()
drop_na: removed 6,966 rows (1%), 870,519 rows remaining
group_by: 2 grouping variables (year, quarter)
mutate (grouped): new variable 'cod_sad_ovr' (double) with 536,514 unique values and 0% NA
ungroup: no grouping variables
# Calculate summarised overlaps
cod_ben_sum <- cod_ben |> 
  drop_na(density_saduria) |> 
  group_by(year, quarter) |> 
  # centre of gravity
  summarise(cod_sad_ovr_tot = asymmalpha_overlapspfn_tot(pred = est, prey = density_saduria),
            
            cod_sad_cog_Y = weighted.mean(Y, cod_sad_ovr),
            cod_sad_cog_X = weighted.mean(X, cod_sad_ovr)) |> 
  ungroup()
drop_na: no rows removed
group_by: 2 grouping variables (year, quarter)
summarise: now 54 rows and 5 columns, one group variable remaining (year)
ungroup: no grouping variables

Plot overlap

# Plot overlap in space
# Sprat
plot_map_fc + 
  geom_raster(data = cod_pel |> filter(year < 2020),
              aes(X*1000, Y*1000, fill = cod_spr_ovr)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Cod-Sprat overlap (ICES Rect.)",
                     option = "mako",
                         # trim extreme high values to make spatial variation more visible
                     na.value = "#DEF5E5FF", limits = c(0, quantile(filter(cod_pel, year < 2020)$cod_spr_ovr, 0.999))) +
  labs(caption = paste("maximum estimated overlap =", round(max(filter(cod_pel, year < 2020)$cod_spr_ovr), digits = 3)))
filter: no rows removed
filter: no rows removed
filter: no rows removed
Warning: Removed 20547 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/spr_overlap_space.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 20547 rows containing missing values (`geom_raster()`).
plot_map_fc + 
  geom_raster(data = cod_pel |> filter(year < 2020),
              aes(X*1000, Y*1000, fill = cod_spr_ovr_sd)) +
  facet_wrap(~year) + 
  scale_fill_viridis(name = "Cod-Sprat overlap", trans = "sqrt",
                     option = "mako")
filter: no rows removed
Warning: Removed 20547 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/supp/spr_overlap_space_sd.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 20547 rows containing missing values (`geom_raster()`).
plot_map + 
  geom_point(data = cod_pel_sum |> filter(year < 2020),
             aes(cod_spr_cog_X*1000, cod_spr_cog_Y*1000, color = year))
filter: no rows removed

ggsave(paste0(home, "/figures/supp/spr_overlap_cog.pdf"), width = 17, height = 17, units = "cm")

plot_map + 
  geom_point(data = cod_pel_sum |> filter(year < 2020),
             aes(cod_spr_cog_sd_X*1000, cod_spr_cog_sd_Y*1000, color = year))
filter: no rows removed

ggsave(paste0(home, "/figures/supp/spr_overlap_cog_sd.pdf"), width = 17, height = 17, units = "cm")

# Herring
plot_map_fc + 
  geom_raster(data = cod_pel |> filter(year < 2020),
              aes(X*1000, Y*1000, fill = cod_her_ovr)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Cod-Herring overlap (ICES Rect.)",
                     option = "mako",
                         # trim extreme high values to make spatial variation more visible
                     na.value = "#DEF5E5FF", limits = c(0, quantile(filter(cod_pel, year < 2020)$cod_her_ovr, 0.999))) +
  labs(caption = paste("maximum estimated overlap =", round(max(filter(cod_pel, year < 2020)$cod_her_ovr), digits = 3)))
filter: no rows removed
filter: no rows removed
filter: no rows removed
Warning: Removed 20547 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/her_overlap_space.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 20547 rows containing missing values (`geom_raster()`).
plot_map_fc + 
  geom_raster(data = cod_pel |> filter(year < 2020),
              aes(X*1000, Y*1000, fill = cod_her_ovr_sd)) +
  facet_wrap(~year) + 
  scale_fill_viridis(trans = "sqrt", name = "Cod-Herring overlap (ICES Rect.)",
                     option = "mako",
                         # trim extreme high values to make spatial variation more visible
                     na.value = "black", limits = c(0, quantile(filter(cod_pel, year < 2020)$cod_her_ovr_sd, 0.999)))
filter: no rows removed
filter: no rows removed
Warning: Removed 20547 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/supp/her_overlap_space_sd.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 20547 rows containing missing values (`geom_raster()`).
plot_map + 
  geom_point(data = cod_pel_sum |> filter(year < 2020),
             aes(cod_her_cog_X*1000, cod_her_cog_Y*1000, color = year))
filter: no rows removed

ggsave(paste0(home, "/figures/supp/her_overlap_cog.pdf"), width = 17, height = 17, units = "cm")

plot_map + 
  geom_point(data = cod_pel_sum |> filter(year < 2020),
             aes(cod_her_cog_sd_X*1000, cod_her_cog_sd_Y*1000, color = year))
filter: no rows removed

ggsave(paste0(home, "/figures/supp/her_overlap_cog_sd.pdf"), width = 17, height = 17, units = "cm")

# Saduria
plot_map_fc + 
  geom_raster(data = cod_ben |> filter(year < 2020 & quarter == 4),
              aes(X*1000, Y*1000, fill = cod_sad_ovr)) +
  facet_wrap(~year) + 
  scale_fill_viridis(name = "Cod-Saduria overlap", trans = "sqrt", option = "mako")
filter: removed 435,278 rows (50%), 435,241 rows remaining
Warning: Removed 20547 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/sad_overlap_space_q4.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 20547 rows containing missing values (`geom_raster()`).
plot_map_fc + 
  geom_raster(data = cod_ben |> filter(year < 2020 & quarter == 1),
              aes(X*1000, Y*1000, fill = cod_sad_ovr)) +
  facet_wrap(~year) + 
  scale_fill_viridis(name = "Cod-Saduria overlap", trans = "sqrt", option = "mako")
filter: removed 435,241 rows (50%), 435,278 rows remaining
Warning: Removed 20547 rows containing missing values (`geom_raster()`).

ggsave(paste0(home, "/figures/supp/sad_overlap_space_q1.pdf"), width = 17, height = 17, units = "cm")
Warning: Removed 20547 rows containing missing values (`geom_raster()`).
plot_map + 
  geom_point(data = cod_ben_sum |> filter(year < 2020 & quarter == 4),
             aes(cod_sad_cog_X*1000, cod_sad_cog_Y*1000, color = year))
filter: removed 27 rows (50%), 27 rows remaining

ggsave(paste0(home, "/figures/supp/sad_overlap_cog_q4.pdf"), width = 17, height = 17, units = "cm")

plot_map + 
  geom_point(data = cod_ben_sum |> filter(year < 2020 & quarter == 1),
             aes(cod_sad_cog_X*1000, cod_sad_cog_Y*1000, color = year))
filter: removed 27 rows (50%), 27 rows remaining

ggsave(paste0(home, "/figures/supp/sad_overlap_cog_q1.pdf"), width = 17, height = 17, units = "cm")

Plot overlap over time

cod_pel_sum_wide <- cod_pel_sum |> 
  pivot_longer(c("cod_spr_ovr_tot", "cod_spr_ovr_sd_tot", "cod_her_ovr_tot", "cod_her_ovr_sd_tot")) |> 
  mutate(Species = ifelse(name %in% c("cod_spr_ovr_tot", "cod_spr_ovr_sd_tot"), "Sprat", "Herring")) |> 
  mutate(Scale = ifelse(name %in% c("cod_her_ovr_sd_tot", "cod_spr_ovr_sd_tot"), "Subdivision", "ICES Rect."))
pivot_longer: reorganized (cod_spr_ovr_tot, cod_spr_ovr_sd_tot, cod_her_ovr_tot, cod_her_ovr_sd_tot) into (name, value) [was 27x13, now 108x11]
mutate: new variable 'Species' (character) with 2 unique values and 0% NA
mutate: new variable 'Scale' (character) with 2 unique values and 0% NA
ggplot(data = cod_pel_sum_wide, aes(year, value)) +
  geom_point(color = "grey20") +
  geom_line() + 
  labs(x = "Year", y = "Asymmetrical alpha overlap index") +
  facet_grid(Scale ~ Species) +
  geom_smooth(method = "gam", formula = y~s(x, k = 4), color = "tomato2") + 
  theme(aspect.ratio = 1)

ggsave(paste0(home, "/figures/pelagic_overlap.pdf"), width = 17, height = 17, units = "cm")

# Saduria
cod_ben_sum |> 
  mutate(quart_lab = ifelse(quarter == 1, "Quarter 1", "Quarter 4")) |> 
  ggplot(aes(year, cod_sad_ovr_tot)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Asymmetrical alpha overlap index") +
  geom_smooth(method = "gam", formula = y~s(x, k = 4), color = "tomato2") +
  facet_wrap(~quart_lab) + 
  theme(aspect.ratio = 1)
mutate: new variable 'quart_lab' (character) with 2 unique values and 0% NA

ggsave(paste0(home, "/figures/saduria_overlap.pdf"), width = 17, height = 11, units = "cm")